This notebook performs differential expression analysis of human endogenous retroviruses (HERVs) across cell-type clusters and brain regions from ASAP PMDBS snRNA-seq data.
Specifically, it: * Defines helper functions for extracting significant genes and visualizing expression (custom getSignName, getAverage, meanPlot_cus, and box_pseudobulk). * Loads TE (HERV) count data generated from trusTEr outputs and organizes them by region and cluster. * Builds DESeq2 models to compare PD vs. control expression across multiple clusters. * Produces summary tables of DE results, mean expression plots, and box plots for selected example HERVs. * Identifies overlaps of upregulated HERVs across cell types and regions using UpSet plots. * Compares in vivo upregulated HERVs in microglia with in vitro stimulation experiments.
Loading data and needed libraries
library(DESeq2)
## Loading required package: S4Vectors
## Loading required package: stats4
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, setdiff, table,
## tapply, union, unique, unsplit, which.max, which.min
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:utils':
##
## findMatches
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: GenomicRanges
## Loading required package: GenomeInfoDb
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
##
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
##
## rowMedians
## The following objects are masked from 'package:matrixStats':
##
## anyMissing, rowMedians
library(data.table)
##
## Attaching package: 'data.table'
## The following object is masked from 'package:SummarizedExperiment':
##
## shift
## The following object is masked from 'package:GenomicRanges':
##
## shift
## The following object is masked from 'package:IRanges':
##
## shift
## The following objects are masked from 'package:S4Vectors':
##
## first, second
library(ggpubr)
## Loading required package: ggplot2
library(ggplot2)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::%within%() masks IRanges::%within%()
## ✖ dplyr::between() masks data.table::between()
## ✖ dplyr::collapse() masks IRanges::collapse()
## ✖ dplyr::combine() masks Biobase::combine(), BiocGenerics::combine()
## ✖ dplyr::count() masks matrixStats::count()
## ✖ dplyr::desc() masks IRanges::desc()
## ✖ tidyr::expand() masks S4Vectors::expand()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::first() masks data.table::first(), S4Vectors::first()
## ✖ lubridate::hour() masks data.table::hour()
## ✖ lubridate::isoweek() masks data.table::isoweek()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dplyr::last() masks data.table::last()
## ✖ lubridate::mday() masks data.table::mday()
## ✖ lubridate::minute() masks data.table::minute()
## ✖ lubridate::month() masks data.table::month()
## ✖ ggplot2::Position() masks BiocGenerics::Position(), base::Position()
## ✖ lubridate::quarter() masks data.table::quarter()
## ✖ purrr::reduce() masks GenomicRanges::reduce(), IRanges::reduce()
## ✖ dplyr::rename() masks S4Vectors::rename()
## ✖ lubridate::second() masks data.table::second(), S4Vectors::second()
## ✖ lubridate::second<-() masks S4Vectors::second<-()
## ✖ dplyr::slice() masks IRanges::slice()
## ✖ purrr::transpose() masks data.table::transpose()
## ✖ lubridate::wday() masks data.table::wday()
## ✖ lubridate::week() masks data.table::week()
## ✖ lubridate::yday() masks data.table::yday()
## ✖ lubridate::year() masks data.table::year()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(openxlsx)
load("/Volumes/MyPassport/ASAP/code/ASAP_PMDBS_snRNAseq/processing/preprocessed_TE_data.RData")
source("/Volumes/MyPassport/ASAP/code/ASAP_PMDBS_snRNAseq/processing/TE_heatmap_functions.R")
# Load samplesheet
samplesheet <- read.xlsx("/Volumes/MyPassport/ASAP/code/ASAP_PMDBS_snRNAseq/original/ASAP_samplesheet.xlsx")
# I want to split HERV_data$te_counts per region, but first I need to categorize the columns by region
clusters <- colnames(HERV_data$te_counts)[7:ncol(HERV_data$te_counts)]
clusters_per_region <- data.frame(cluster = clusters,
region = str_extract(clusters, "SN|PUT|PFC|AMY")) %>%
group_by(region) %>%
summarise(clusters = list(cluster)) %>%
ungroup()
expressed_hervs_bulkrna <- fread("/Volumes/MyPassport/ASAP/code/ASAP_PMDBS_bulkRNAseq/results/tables/expressed_fl_hervs_expressed.tab", data.table = F, header = F)$V1
# Now we loop through the regions and select only the region's columns
te_counts <- list()
for (i in 1:nrow(clusters_per_region)) {
region <- clusters_per_region$region[i]
cluster_cols <- clusters_per_region$clusters[[i]] # these are vectors of column names
# Subset counts dataframe by these columns and keep Geneid
te_counts[[region]] <- HERV_data$te_counts %>%
select(Geneid, all_of(cluster_cols)) %>%
filter(Geneid %in% expressed_hervs_bulkrna) # Expressed in bulk RNA
}
head(te_counts$SN)
getSignName(), getAverage(),
meanPlot_cus(), pick_colors_meanplot()Purpose: Extract significantly DE HERVs, compute mean expression per condition, and visualize pairwise mean comparisons.
## getSignName ##
# Get significantly different gene names.
# Taken from source code of the package deseqAbstraction which is no longer available on github.
# Credits to Per L. Brattås
# Parameters:
# x = results object from deseq
# p = padj threshold for significance
# l = log2FC threshold for significance
getSignName <- function(x,p,l=0) {
up <- x[!is.na(x$padj) & x$padj < p & x$log2FoldChange > l,]
down <- x[!is.na(x$padj) & x$padj < p & x$log2FoldChange < -l,]
return(list(up=rownames(up),down=rownames(down)))
}
## getAverage ##
# Get average expression (normalized by median of ratios) of each of the conditions in a deseq object.
# Taken from source code of the package deseqAbstraction which is no longer available on github.
# Credits to Per L. Brattås
# Parameters:
# dds = deseq object
getAverage <- function(dds) {
baseMeanPerLvl <- sapply( levels(dds$Dx), function(lvl) rowMeans( counts(dds,normalized=TRUE)[,dds$Dx == lvl] ) )
baseSDPerLvl <- sapply( levels(dds$Dx), function(lvl) apply( counts(dds,normalized=TRUE)[,dds$Dx == lvl],1,sd ) )
colnames(baseSDPerLvl) <- paste("st.dev:",colnames(baseSDPerLvl),sep="")
return(list(Mean=baseMeanPerLvl,SD=baseSDPerLvl))
}
meanPlot_cus <- function(exp,test,c1 = "condition 1",c2 = "condition 2",p=.05,l=0,id=F, ttl="",
repel=TRUE, col1="tomato", col2="darkturquoise", col3="grey", highlights=NA, select_rows = NA){
if(!all(is.na(select_rows))){
exp <- exp[select_rows,]
test <- test[select_rows,]
}
sign <- getSignName(x = test,p = p,l = l)
u <- sign$up
d <- sign$down
#color up and down sign..
colVec <- ifelse(test = (rownames(exp) %in% u),
yes = col1,
no = ifelse(test = (rownames(exp) %in% d),
yes = col2, no =col3))
colVec[is.na(colVec)] <- "steelblue" ## if NA make sure it's not counted as <p
#size of points
cexVec <- ifelse(test = (rownames(exp) %in% u),
yes = 0.35,
no = ifelse(test = (rownames(exp) %in% d),
yes = 0.35, no = 0.3))
exp_log <- as.data.frame(log2(exp[,c(c1, c2)] + 0.5))
exp_log$Name <- rownames(exp_log)
exp_log$reg <- factor(ifelse(exp_log$Name %in% u, paste('upregulated in ', c1, ' (', length(u), ')', sep =''),
ifelse(exp_log$Name %in% d, paste('downregulated in ', c1,' (', length(d), ')', sep =''), paste('not significant', ' (', (nrow(test) - length(u) - length(d)), ')', sep=''))))
library(ggrepel)
if(repel == TRUE){
plt <- ggplot(exp_log, aes(x=get(c2), y=get(c1), label=Name, color=reg)) + geom_point(aes(size=cexVec), alpha=0.7)+ scale_color_manual(values=c(col2, col3, col1))+ scale_size_continuous(range=c(1,2), guide="none")+ geom_text_repel(data = subset(exp_log, Name %in% u | Name %in% d),direction = "y", nudge_y = 0.4, nudge_x = -0.5)
}
else{
plt <- ggplot(exp_log, aes(x=get(c2), y=get(c1), color=reg)) + geom_point(aes(size=cexVec), alpha=0.7)+ scale_color_manual(values=c(col2, col3, col1))+ scale_size_continuous(range=c(1,2), guide="none")
}
plt <- plt + labs(x=paste("log2(mean ",c2,")",sep=""),
y=paste("log2(mean ",c1,")",sep=""),
title=paste(ttl, paste(c1," vs. ",c2,sep=""), sep = ': '),
subtitle=paste("p-adj < ",p,", log2(fc) > ",l,sep=""))+ theme_bw() +
theme( plot.title = element_text( size=14, face="bold"), text = element_text(size=15), panel.grid.minor = element_blank(), panel.grid.major = element_blank(), legend.title=element_blank())
# if(id == T) {
# identify(log2(exp[,1]),log2(exp[,2]),labels = rownames(exp))
# }
if(length(highlights) > 0){
plt <- plt + geom_point(data=exp_log[highlights,], aes(x=get(c2), y=get(c1)), colour="springgreen3", size=5, shape=1, stroke=2)
}
return(plt)
}
pick_colors_meanplot <- function(df){
if(any(df$padj < 0.05 &
df$log2FoldChange > 1, na.rm = TRUE)){ # There is upregulated
if(any(df$padj < 0.05 &
df$log2FoldChange < -1, na.rm = TRUE)){ # There is downregulated
col1 = "tomato"
col2 = "darkturquoise"
col3 = "grey"
}else{ # Upregulated, but not downregulated
col1 = NA
col2 = "grey"
col3 = "tomato"
}
}else{
if(any(df$padj < 0.05 & # There is downregulated but not upregulated
df$log2FoldChange < -1, na.rm = TRUE)){
col1 = NA
col2 = "darkturquoise"
col3 = "grey"
}else{
col1 = "grey"
col2 = "grey"
col3 = "grey"
}
}
return(list("col1" = col1,
"col2" = col2,
"col3" = col3))
}
te_dds_list <- list()
te_exp_list <- list()
te_res_list <- list()
te_res_list_df <- list()
regions <- c("SN", "PUT", "PFC", "AMY")
for(cluster in as.character(0:7)){
for(region in regions){
print(cluster)
print(region)
samplesheet_list <- split(samplesheet, f = samplesheet$Region)
rownames(samplesheet_list[[region]]) <- samplesheet_list[[region]]$Sample
samples_cluster <- paste(rownames(samplesheet_list[[region]]), cluster, sep="_")
samples_cluster <- samples_cluster[which(samples_cluster %in% colnames(te_counts[[region]]))]
rownames(samplesheet_list[[region]]) <- paste(rownames(samplesheet_list[[region]]), cluster, sep="_")
# Expressed in snRNA
te_counts_expressed <- te_counts[[region]][which(rowSums(te_counts[[region]][,samples_cluster]) > 0),]
print(paste0("Cluster ", cluster, ", region ", region, " expresses ", nrow(te_counts_expressed)))
region_cluster <- paste0(region, "_", cluster)
te_dds_list[[region_cluster]] <- DESeqDataSetFromMatrix((te_counts_expressed[,samples_cluster]+1), samplesheet_list[[region]][samples_cluster,], design = ~ Dx)
te_dds_list[[region_cluster]]$Dx <- relevel(te_dds_list[[region_cluster]]$Dx, "Ctl")
error_handle <- tryCatch({
te_dds_list[[region_cluster]] <- DESeq(te_dds_list[[region_cluster]])
}, error = function(e) {
print(paste("Something happened with ", region, cluster))
return(1)
})
if(is.numeric(error_handle)){
if(error_handle == 1){
te_dds_list[[region_cluster]] <- NA
te_res_list[[region_cluster]] <- NA
te_res_list_df[[region_cluster]] <- NA
te_exp_list[[region_cluster]] <- NA
}
}else{
te_exp_list[[region_cluster]] <- getAverage(te_dds_list[[region_cluster]])
te_res_list[[region_cluster]] <- lfcShrink(te_dds_list[[region_cluster]], "Dx_PD_vs_Ctl")
te_res_list_df[[region_cluster]] <- as.data.frame(te_res_list[[region_cluster]])
te_res_list_df[[region_cluster]]$HERV_id <- rownames(te_res_list_df[[region_cluster]])
}
}
}
## [1] "0"
## [1] "SN"
## [1] "Cluster 0, region SN expresses 66"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## Warning in lfproc(x, y, weights = weights, cens = cens, base = base, geth =
## geth, : Estimated rdf < 1.0; not estimating variance
## final dispersion estimates
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "0"
## [1] "PUT"
## [1] "Cluster 0, region PUT expresses 674"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 47 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "0"
## [1] "PFC"
## [1] "Cluster 0, region PFC expresses 724"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 19 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "0"
## [1] "AMY"
## [1] "Cluster 0, region AMY expresses 678"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 54 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "1"
## [1] "SN"
## [1] "Cluster 1, region SN expresses 451"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 82 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "1"
## [1] "PUT"
## [1] "Cluster 1, region PUT expresses 634"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 55 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "1"
## [1] "PFC"
## [1] "Cluster 1, region PFC expresses 699"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 17 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "1"
## [1] "AMY"
## [1] "Cluster 1, region AMY expresses 683"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 48 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "2"
## [1] "SN"
## [1] "Cluster 2, region SN expresses 647"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 86 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "2"
## [1] "PUT"
## [1] "Cluster 2, region PUT expresses 682"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 81 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "2"
## [1] "PFC"
## [1] "Cluster 2, region PFC expresses 672"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 37 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "2"
## [1] "AMY"
## [1] "Cluster 2, region AMY expresses 670"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 78 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "3"
## [1] "SN"
## [1] "Cluster 3, region SN expresses 607"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 83 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "3"
## [1] "PUT"
## [1] "Cluster 3, region PUT expresses 604"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 93 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "3"
## [1] "PFC"
## [1] "Cluster 3, region PFC expresses 601"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 31 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "3"
## [1] "AMY"
## [1] "Cluster 3, region AMY expresses 623"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 75 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "4"
## [1] "SN"
## [1] "Cluster 4, region SN expresses 526"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 59 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "4"
## [1] "PUT"
## [1] "Cluster 4, region PUT expresses 493"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 40 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "4"
## [1] "PFC"
## [1] "Cluster 4, region PFC expresses 460"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 25 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "4"
## [1] "AMY"
## [1] "Cluster 4, region AMY expresses 504"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 35 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "5"
## [1] "SN"
## [1] "Cluster 5, region SN expresses 697"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 53 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "5"
## [1] "PUT"
## [1] "Cluster 5, region PUT expresses 674"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 70 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "5"
## [1] "PFC"
## [1] "Cluster 5, region PFC expresses 659"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 77 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "5"
## [1] "AMY"
## [1] "Cluster 5, region AMY expresses 656"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 86 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "6"
## [1] "SN"
## [1] "Cluster 6, region SN expresses 287"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 39 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "6"
## [1] "PUT"
## [1] "Cluster 6, region PUT expresses 212"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 13 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "6"
## [1] "PFC"
## [1] "Cluster 6, region PFC expresses 314"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 13 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "6"
## [1] "AMY"
## [1] "Cluster 6, region AMY expresses 257"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 21 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "7"
## [1] "SN"
## [1] "Cluster 7, region SN expresses 232"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 23 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "7"
## [1] "PUT"
## [1] "Cluster 7, region PUT expresses 109"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 10 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "7"
## [1] "PFC"
## [1] "Cluster 7, region PFC expresses 62"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 1 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
## [1] "7"
## [1] "AMY"
## [1] "Cluster 7, region AMY expresses 107"
## converting counts to integer mode
## Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
## design formula are characters, converting to factors
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## -- note: fitType='parametric', but the dispersion trend was not well captured by the
## function: y = a/x + b, and a local regression fit was automatically substituted.
## specify fitType='local' or 'mean' to avoid this message next time.
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 10 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
openxlsx::write.xlsx(te_res_list_df, paste("/Volumes/MyPassport/ASAP/data/ASAP_PMDBS_snRNAseq/results/tables/HERV_PDvsCtl_res.xlsx", sep=""))
meanPlot_cus())mean_plots_list <- list()
te_res_expressed_list_df <- list()
for(region_cluster in names(te_exp_list)){
if(!all(is.na(te_res_list[[region_cluster]]))){
region <- sapply(str_split(region_cluster, "_"), `[[`, 1)
effect <- unique(samplesheet_list[[region]][which(samplesheet_list[[region]]$Dx != "Ctl"),"Dx"])
colors_list <- pick_colors_meanplot(te_res_list[[region_cluster]])
mean_plots_list[[region_cluster]] <- meanPlot_cus(exp = te_exp_list[[region_cluster]]$Mean,
test = te_res_list[[region_cluster]],
col1=colors_list$col1, col2=colors_list$col2, col3=colors_list$col3,
c1 = effect, c2="Ctl", p = 0.05, l=1, ttl = region_cluster, repel = F)
}
}
# Neurons (cluster 0):
# Very little number of nuclei in SN in this cluster - ignored
mean_plots_list$PUT_0
## Warning: Removed 674 rows containing missing values or values outside the scale range
## (`geom_point()`).
mean_plots_list$PFC_0
## Warning: Removed 724 rows containing missing values or values outside the scale range
## (`geom_point()`).
mean_plots_list$AMY_0
## Warning: Removed 678 rows containing missing values or values outside the scale range
## (`geom_point()`).
# Neurons (cluster 1)
mean_plots_list$PUT_1
## Warning: Removed 634 rows containing missing values or values outside the scale range
## (`geom_point()`).
mean_plots_list$PFC_1
## Warning: Removed 699 rows containing missing values or values outside the scale range
## (`geom_point()`).
mean_plots_list$AMY_1
## Warning: Removed 683 rows containing missing values or values outside the scale range
## (`geom_point()`).
# Astrocytes
mean_plots_list$SN_2
## Warning: Removed 647 rows containing missing values or values outside the scale range
## (`geom_point()`).
mean_plots_list$PUT_2
## Warning: Removed 682 rows containing missing values or values outside the scale range
## (`geom_point()`).
mean_plots_list$PFC_2
## Warning: Removed 672 rows containing missing values or values outside the scale range
## (`geom_point()`).
mean_plots_list$AMY_2
## Warning: Removed 670 rows containing missing values or values outside the scale range
## (`geom_point()`).
# OPCs
mean_plots_list$SN_3
## Warning: Removed 607 rows containing missing values or values outside the scale range
## (`geom_point()`).
mean_plots_list$PUT_3
## Warning: Removed 604 rows containing missing values or values outside the scale range
## (`geom_point()`).
mean_plots_list$PFC_3
## Warning: Removed 601 rows containing missing values or values outside the scale range
## (`geom_point()`).
mean_plots_list$AMY_3
## Warning: Removed 623 rows containing missing values or values outside the scale range
## (`geom_point()`).
# Microglia
mean_plots_list$SN_4
## Warning: Removed 526 rows containing missing values or values outside the scale range
## (`geom_point()`).
mean_plots_list$PUT_4
## Warning: Removed 493 rows containing missing values or values outside the scale range
## (`geom_point()`).
mean_plots_list$PFC_4
## Warning: Removed 460 rows containing missing values or values outside the scale range
## (`geom_point()`).
mean_plots_list$AMY_4
## Warning: Removed 504 rows containing missing values or values outside the scale range
## (`geom_point()`).
# Oligodendrocytes
mean_plots_list$SN_5
## Warning: Removed 697 rows containing missing values or values outside the scale range
## (`geom_point()`).
mean_plots_list$PUT_5
## Warning: Removed 674 rows containing missing values or values outside the scale range
## (`geom_point()`).
mean_plots_list$PFC_5
## Warning: Removed 659 rows containing missing values or values outside the scale range
## (`geom_point()`).
mean_plots_list$AMY_5
## Warning: Removed 656 rows containing missing values or values outside the scale range
## (`geom_point()`).
# VLMCs and Tcells skipped as their clusters are too small
box_pseudobulk() functionbox_pseudobulk <- function(df, res, gene, log2=F, jitter.height = 0){
if(!gene %in% rownames(df)){
return("This gene was not found")
}
tmp_microglia_df_melt <- reshape2::melt(df[gene,])
tmp_microglia_df_melt$sample <- rownames(tmp_microglia_df_melt)
cluster <- unique(sapply(str_split(rownames(tmp_microglia_df_melt), "_"), tail, 1))
tmp_samplesheet <- samplesheet
tmp_samplesheet$sample_cluster <- paste(tmp_samplesheet$Sample, cluster, sep = "_")
tmp_samplesheet <- tmp_samplesheet[which(tmp_samplesheet$sample_cluster %in% rownames(tmp_microglia_df_melt)),]
if(nrow(tmp_microglia_df_melt) == nrow(tmp_samplesheet)){
tmp_microglia_df_melt <- merge(tmp_microglia_df_melt, tmp_samplesheet, by.x="sample", by.y="sample_cluster")
tmp <- res[gene,]
tmp$group1 <- "PD"
tmp$group2 <- "Ctl"
tmp$`.y.` <- "value"
tmp$p.format <- format(tmp$pvalue, digits = 3, scientific = T)
tmp$log2FoldChange <- round(tmp$log2FoldChange, digits = 3)
# If a p-value is less than 0.05, it is flagged with one star (*). If a p-value is less than 0.01, it is flagged with 2 stars (**). If a p-value is less than 0.001, it is flagged with three stars (***).
tmp$p.signif <- ifelse(tmp$pvalue < 0.001, "***",
ifelse(tmp$pvalue < 0.01, "**",
ifelse(tmp$pvalue < 0.05, "*", "ns") ) )
tmp$pformat_log2FC <- paste("pvalue=", tmp$p.format, ";\nlog2FC=", tmp$log2FoldChange, sep="")
tmp$pformat_log2FC_star <- ifelse(tmp$p.signif != "ns", paste("log2FC=", tmp$log2FoldChange, "\n", tmp$p.signif, sep=""), tmp$p.signif)
tmp$Dx <- "Dx"
if(log2){
plt <- ggplot(tmp_microglia_df_melt, aes(x=Dx, y=log2(value+1), fill=Dx)) +
geom_jitter(aes(color=Dx), size=1, width = 0.1, height = 0) + ylim(c(min(log2(tmp_microglia_df_melt$value+1)), max(log2(tmp_microglia_df_melt$value+1)) + sd(log2(tmp_microglia_df_melt$value+1)))) + labs(x="Region", y="log2(Normalized Expression)") +
stat_pvalue_manual(tmp, y.position = max(log2(tmp_microglia_df_melt$value+1)) + 0.05*(max(log2(tmp_microglia_df_melt$value+1))), label = "pformat_log2FC")
}else{
plt <- ggplot(tmp_microglia_df_melt, aes(x=Dx, y=value, fill=Dx)) +
geom_jitter(aes(color=Dx), size=1, width = 0.1, height = 0) +
ylim(c(min(tmp_microglia_df_melt$value), max(tmp_microglia_df_melt$value) + sd(tmp_microglia_df_melt$value))) + labs(x="Region", y="Normalized Expression") +
stat_pvalue_manual(tmp, y.position = max(tmp_microglia_df_melt$value) + 0.05*(max(tmp_microglia_df_melt$value)), label = "pformat_log2FC")
}
plt +
geom_boxplot(outliers = F, width=0.3, size=0.3, alpha=0.5) +
stat_summary( geom = "point", fun.y = "mean",size=2, col = "black", shape = 24) +
stat_summary(geom = "line", fun.y = mean, group = 1, linetype="dashed") +
theme_bw() +
theme(text = element_text(size = 15)) +
facet_wrap(.~Region, scales = "free", ncol=4) +
scale_fill_manual(values = c("Ctl" = "lightgrey", "PD" = "tomato")) +
scale_color_manual(values = c("Ctl" = "grey", "PD" = "tomato")) + ggtitle(gene)
}
}
herv_counts_norm <- list()
for(region_cluster in names(te_dds_list)){
herv_counts_norm[[region_cluster]] <- list()
error_handle <- tryCatch({
herv_counts_norm[[region_cluster]] <- counts(te_dds_list[[region_cluster]], normalize = T)
}, error = function(e) {
print(paste("Something happened with ", region_cluster))
return(1)
})
}
samplesheet_list[["SN"]]$sample_cluster <- paste(samplesheet_list$SN$Sample, "4", sep="_")
samplesheet_list[["PUT"]]$sample_cluster <- paste(samplesheet_list$PUT$Sample, "4", sep="_")
plot_list_sn <- list()
plot_list_put <- list()
for (herv in c("HERV_dup880", te_res_list_df$SN_4[which(te_res_list_df$SN_4$padj < 0.05 & te_res_list_df$SN_4$log2FoldChange > 0),"TE_id"])){
plot_list_sn[[herv]] <- box_pseudobulk(df = herv_counts_norm$SN_4,res = te_res_list_df$SN_4,gene = herv, log2=F)
plot_list_put[[herv]] <- box_pseudobulk(df = herv_counts_norm$PUT_4, te_res_list_df$PUT_4, herv, log2=F)
}
## Warning: The `fun.y` argument of `stat_summary()` is deprecated as of ggplot2 3.3.0.
## ℹ Please use the `fun` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
box_pseudobulk(df = herv_counts_norm$SN_4, te_res_list_df$SN_4, "HERV_dup880", log2=T)
box_pseudobulk(df = herv_counts_norm$PUT_4, te_res_list_df$PUT_4, "HERV_dup880", log2=T)
box_pseudobulk(df = herv_counts_norm$PFC_4, te_res_list_df$PFC_4, "HERV_dup880", log2=T)
box_pseudobulk(df = herv_counts_norm$AMY_4, te_res_list_df$AMY_4, "HERV_dup880", log2=T)
box_pseudobulk(df = herv_counts_norm$PUT_1, te_res_list_df$PUT_1, "HERV_dup2896", log2=T)
box_pseudobulk(df = herv_counts_norm$PFC_1, te_res_list_df$PFC_1, "HERV_dup2896", log2=T)
box_pseudobulk(df = herv_counts_norm$AMY_1, te_res_list_df$AMY_1, "HERV_dup2896", log2=T)
box_pseudobulk(df = herv_counts_norm$SN_1, te_res_list_df$SN_1, "HERV_dup3355", log2=T)
box_pseudobulk(df = herv_counts_norm$PUT_1, te_res_list_df$PUT_1, "HERV_dup3355", log2=T)
box_pseudobulk(df = herv_counts_norm$PFC_1, te_res_list_df$PFC_1, "HERV_dup3355", log2=T)
box_pseudobulk(df = herv_counts_norm$AMY_1, te_res_list_df$AMY_1, "HERV_dup3355", log2=T)
get_significant_HERVs <- function(df){
det <- df[which(df$padj < 0.05 & df$log2FoldChange > 1), "HERV_id"]
if (length(det) > 0){
return(det)
}else{
return(NA)
}
}
library(grid)
library(UpSetR)
celltypes <- c("Exc Neurons", "Inh Neurons", "Astrocytes", "OPCs", "Oligos", "Microglia", "Endothelial", "T cells")
# Astrocytes
upset(fromList(sapply(list("SN_2" = te_res_list_df$SN_2,
"PUT_2" = te_res_list_df$PUT_2,
"PFC_2" = te_res_list_df$PFC_2,
"AMY_2" = te_res_list_df$AMY_2), get_significant_HERVs)))
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
grid.text("Astrocytes", vjust = -17, hjust= -3)
# Microglia
upset(fromList(sapply(list("SN_4" = te_res_list_df$SN_4,
"PUT_4" = te_res_list_df$PUT_4,
"PFC_4" = te_res_list_df$PFC_4,
"AMY_4" = te_res_list_df$AMY_4), get_significant_HERVs)))
grid.text("Microglia", vjust = -17, hjust= -3)
# OPCs
upset(fromList(sapply(list("SN_3" = te_res_list_df$SN_3,
"PUT_3" = te_res_list_df$PUT_3,
"PFC_3" = te_res_list_df$PFC_3,
"AMY_3" = te_res_list_df$AMY_3), get_significant_HERVs)))
grid.text("OPCs", vjust = -17, hjust= -3)
# Oligos
upset(fromList(sapply(list("SN_5" = te_res_list_df$SN_5,
"PUT_5" = te_res_list_df$PUT_5,
"PFC_5" = te_res_list_df$PFC_5,
"AMY_5" = te_res_list_df$AMY_5), get_significant_HERVs)))
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
grid.text("Oligodendrocytes", vjust = -17, hjust= 0)
# Neurons cluster 1
upset(fromList(sapply(list("PUT_1" = te_res_list_df$PUT_1,
"PFC_1" = te_res_list_df$PFC_1,
"AMY_1" = te_res_list_df$AMY_1), get_significant_HERVs)))
grid.text("Neurons cluster 1", vjust = -17, hjust= -3)
# All in SN
upset(fromList(list(
# "OPCs" = get_significant_HERVs(te_res_list_df$SN_3),
"Oligos" = get_significant_HERVs(te_res_list_df$SN_5),
"Microglia" = get_significant_HERVs(te_res_list_df$SN_4),
# "VLMCs" = get_significant_HERVs(te_res_list_df$SN_6),
# "T cells" = get_significant_HERVs(te_res_list_df$SN_7),
# "Exc Neurons" = get_significant_HERVs(te_res_list_df$SN_0),
# "Inh Neurons" = get_significant_HERVs(te_res_list_df$SN_1),
"Astrocytes" = get_significant_HERVs(te_res_list_df$SN_2))),
sets = c("Oligos", "Microglia", "Astrocytes"), nintersects = 100, nsets = 100, mainbar.y.max = 30)
grid.text("Substantia nigra", vjust = -17, hjust= 0)
# All in PUT
upset(fromList(list("OPCs" = get_significant_HERVs(te_res_list_df$PUT_3),
"Oligos" = get_significant_HERVs(te_res_list_df$PUT_5),
"Microglia" = get_significant_HERVs(te_res_list_df$PUT_4),
# "VLMCs" = get_significant_HERVs(te_res_list_df$PUT_6),
# "T cells" = get_significant_HERVs(te_res_list_df$PUT_7),
# "Exc Neurons" = get_significant_HERVs(te_res_list_df$PUT_0),
"Inh Neurons" = get_significant_HERVs(te_res_list_df$PUT_1),
"Astrocytes" = get_significant_HERVs(te_res_list_df$PUT_2))), sets = c("OPCs", "Oligos", "Microglia", "Inh Neurons", "Astrocytes"), nintersects = 100, nsets = 100, mainbar.y.max = 30)
grid.text("Putamen", vjust = -17, hjust= 0)
# All in AMY
upset(fromList(list("Exc Neurons" = get_significant_HERVs(te_res_list_df$AMY_0),
# "Inh Neurons" = get_significant_HERVs(te_res_list_df$AMY_1),
"Astrocytes" = get_significant_HERVs(te_res_list_df$AMY_2)
# "OPCs" = get_significant_HERVs(te_res_list_df$AMY_3),
# "Oligos" = get_significant_HERVs(te_res_list_df$AMY_5),
# "Microglia" = get_significant_HERVs(te_res_list_df$AMY_4),
# "VLMCs" = get_significant_HERVs(te_res_list_df$AMY_6),
# "T cells" = get_significant_HERVs(te_res_list_df$AMY_7)
)), sets = c("Exc Neurons", "Astrocytes"), nintersects = 100, nsets = 100, mainbar.y.max = 30)
## `geom_line()`: Each group consists of only one observation.
## ℹ Do you need to adjust the group aesthetic?
grid.text("Amygdala", vjust = -17, hjust= 0)
upset(fromList(list("SN: OPCs" = get_significant_HERVs(te_res_list_df$SN_3),
"SN: Oligos" = get_significant_HERVs(te_res_list_df$SN_5),
"SN: Microglia" = get_significant_HERVs(te_res_list_df$SN_4),
"SN: VLMCs" = get_significant_HERVs(te_res_list_df$SN_6),
"SN: T cells" = get_significant_HERVs(te_res_list_df$SN_7),
"SN: Neurons cluster 0" = get_significant_HERVs(te_res_list_df$SN_0),
"SN: Neurons cluster 1" = get_significant_HERVs(te_res_list_df$SN_1),
"SN: Astrocytes" = get_significant_HERVs(te_res_list_df$SN_2),
"PUT: OPCs" = get_significant_HERVs(te_res_list_df$PUT_3),
"PUT: Oligos" = get_significant_HERVs(te_res_list_df$PUT_5),
"PUT: Microglia" = get_significant_HERVs(te_res_list_df$PUT_4),
"PUT: VLMCs" = get_significant_HERVs(te_res_list_df$PUT_6),
"PUT: T cells" = get_significant_HERVs(te_res_list_df$PUT_7),
"PUT: Neurons cluster 0" = get_significant_HERVs(te_res_list_df$PUT_0),
"PUT: Neurons cluster 1" = get_significant_HERVs(te_res_list_df$PUT_1),
"PUT: Astrocytes" = get_significant_HERVs(te_res_list_df$PUT_2),
"PFC: OPCs" = get_significant_HERVs(te_res_list_df$PFC_3),
"PFC: Oligos" = get_significant_HERVs(te_res_list_df$PFC_5),
"PFC: Microglia" = get_significant_HERVs(te_res_list_df$PFC_4),
"PFC: VLMCs" = get_significant_HERVs(te_res_list_df$PFC_6),
"PFC: T cells" = get_significant_HERVs(te_res_list_df$PFC_7),
"PFC: Neurons cluster 0" = get_significant_HERVs(te_res_list_df$PFC_0),
"PFC: Neurons cluster 1" = get_significant_HERVs(te_res_list_df$PFC_1),
"PFC: Astrocytes" = get_significant_HERVs(te_res_list_df$PFC_2),
"AMY: OPCs" = get_significant_HERVs(te_res_list_df$AMY_3),
"AMY: Oligos" = get_significant_HERVs(te_res_list_df$AMY_5),
"AMY: Microglia" = get_significant_HERVs(te_res_list_df$AMY_4),
"AMY: VLMCs" = get_significant_HERVs(te_res_list_df$AMY_6),
"AMY: T cells" = get_significant_HERVs(te_res_list_df$AMY_7),
"AMY: Neurons cluster 0" = get_significant_HERVs(te_res_list_df$AMY_0),
"AMY: Neurons cluster 1" = get_significant_HERVs(te_res_list_df$AMY_1),
"AMY: Astrocytes" = get_significant_HERVs(te_res_list_df$AMY_2))), nintersects = 100, nsets = 100, mainbar.y.max = 30)
Load HERV upregulation results from IFNγ, TNFα, and PolyIC-stimulated H9 microglia Compare overlaps with in vivo upregulated HERVs in SN and PUT microglia Visualize with UpSet plots and intersections
invitro_microglia <- list()
invitro_microglia[["microglia.H9.IFNg"]] <- read.xlsx("/Volumes/MyPassport/ASAP/code/ASAP_invitro/results/tables/upset_HERV_upreg_microglia_IFNg_TNFa_PolyIC.xlsx", sheet = "microglia.H9.IFNg", colNames = F)
invitro_microglia[["microglia.H9.TNFa"]] <- read.xlsx("/Volumes/MyPassport/ASAP/code/ASAP_invitro/results/tables/upset_HERV_upreg_microglia_IFNg_TNFa_PolyIC.xlsx", sheet = "microglia.H9.TNFa", colNames = F)
invitro_microglia[["microglia.H9.PolyIC"]] <- read.xlsx("/Volumes/MyPassport/ASAP/code/ASAP_invitro/results/tables/upset_HERV_upreg_microglia_IFNg_TNFa_PolyIC.xlsx", sheet = "microglia.H9.PolyIC", colNames = F)
upset(fromList(list("SN_microglia" = te_res_list_df$SN_4[which(te_res_list_df$SN_4$log2FoldChange > 1), "HERV_id"],
"PUT_microglia" = te_res_list_df$PUT_4[which(te_res_list_df$PUT_4$log2FoldChange > 1), "HERV_id"],
"microglia.H9.IFNg" = invitro_microglia[["microglia.H9.IFNg"]]$X1,
"microglia.H9.TNFa" = invitro_microglia[["microglia.H9.TNFa"]]$X1,
"microglia.H9.PolyIC" = invitro_microglia[["microglia.H9.PolyIC"]]$X1)))
intersect(te_res_list_df$PUT_4[which(te_res_list_df$PUT_4$log2FoldChange > 1), "HERV_id"], invitro_microglia[["microglia.H9.IFNg"]]$X1)
## [1] "HERV_dup300"
intersect(te_res_list_df$SN_4[which(te_res_list_df$SN_4$log2FoldChange > 1), "HERV_id"], invitro_microglia[["microglia.H9.IFNg"]]$X1)
## [1] "HERV_dup880" "HERV_dup2789"
# Save functions in a separate file
dump(
c("getSignName", "getAverage",
"meanPlot_cus", "box_pseudobulk", "pick_colors_meanplot"),
file = "TE_DEA_functions.R"
)
saveRDS(herv_counts_norm, "HERV_norm_expression.Rdata")